{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Examples and Exercises from Think Stats, 2nd Edition\n",
    "\n",
    "http://thinkstats2.com\n",
    "\n",
    "Copyright 2016 Allen B. Downey\n",
    "\n",
    "MIT License: https://opensource.org/licenses/MIT\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import print_function, division\n",
    "\n",
    "import nsfg"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Examples from Chapter 1\n",
    "\n",
    "Read NSFG data into a Pandas DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>caseid</th>\n",
       "      <th>pregordr</th>\n",
       "      <th>howpreg_n</th>\n",
       "      <th>howpreg_p</th>\n",
       "      <th>moscurrp</th>\n",
       "      <th>nowprgdk</th>\n",
       "      <th>pregend1</th>\n",
       "      <th>pregend2</th>\n",
       "      <th>nbrnaliv</th>\n",
       "      <th>multbrth</th>\n",
       "      <th>...</th>\n",
       "      <th>laborfor_i</th>\n",
       "      <th>religion_i</th>\n",
       "      <th>metro_i</th>\n",
       "      <th>basewgt</th>\n",
       "      <th>adj_mod_basewgt</th>\n",
       "      <th>finalwgt</th>\n",
       "      <th>secu_p</th>\n",
       "      <th>sest</th>\n",
       "      <th>cmintvw</th>\n",
       "      <th>totalwgt_lb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3410.389399</td>\n",
       "      <td>3869.349602</td>\n",
       "      <td>6448.271112</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.8125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3410.389399</td>\n",
       "      <td>3869.349602</td>\n",
       "      <td>6448.271112</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.8750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7226.301740</td>\n",
       "      <td>8567.549110</td>\n",
       "      <td>12999.542264</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.1250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7226.301740</td>\n",
       "      <td>8567.549110</td>\n",
       "      <td>12999.542264</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>7.0000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7226.301740</td>\n",
       "      <td>8567.549110</td>\n",
       "      <td>12999.542264</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.1875</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 244 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   caseid  pregordr  howpreg_n  howpreg_p  moscurrp  nowprgdk  pregend1  \\\n",
       "0       1         1        NaN        NaN       NaN       NaN       6.0   \n",
       "1       1         2        NaN        NaN       NaN       NaN       6.0   \n",
       "2       2         1        NaN        NaN       NaN       NaN       5.0   \n",
       "3       2         2        NaN        NaN       NaN       NaN       6.0   \n",
       "4       2         3        NaN        NaN       NaN       NaN       6.0   \n",
       "\n",
       "   pregend2  nbrnaliv  multbrth     ...       laborfor_i  religion_i  metro_i  \\\n",
       "0       NaN       1.0       NaN     ...                0           0        0   \n",
       "1       NaN       1.0       NaN     ...                0           0        0   \n",
       "2       NaN       3.0       5.0     ...                0           0        0   \n",
       "3       NaN       1.0       NaN     ...                0           0        0   \n",
       "4       NaN       1.0       NaN     ...                0           0        0   \n",
       "\n",
       "       basewgt  adj_mod_basewgt      finalwgt  secu_p  sest  cmintvw  \\\n",
       "0  3410.389399      3869.349602   6448.271112       2     9      NaN   \n",
       "1  3410.389399      3869.349602   6448.271112       2     9      NaN   \n",
       "2  7226.301740      8567.549110  12999.542264       2    12      NaN   \n",
       "3  7226.301740      8567.549110  12999.542264       2    12      NaN   \n",
       "4  7226.301740      8567.549110  12999.542264       2    12      NaN   \n",
       "\n",
       "   totalwgt_lb  \n",
       "0       8.8125  \n",
       "1       7.8750  \n",
       "2       9.1250  \n",
       "3       7.0000  \n",
       "4       6.1875  \n",
       "\n",
       "[5 rows x 244 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg = nsfg.ReadFemPreg()\n",
    "preg.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Print the column names."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['caseid', 'pregordr', 'howpreg_n', 'howpreg_p', 'moscurrp', 'nowprgdk',\n",
       "       'pregend1', 'pregend2', 'nbrnaliv', 'multbrth',\n",
       "       ...\n",
       "       'laborfor_i', 'religion_i', 'metro_i', 'basewgt', 'adj_mod_basewgt',\n",
       "       'finalwgt', 'secu_p', 'sest', 'cmintvw', 'totalwgt_lb'],\n",
       "      dtype='object', length=244)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select a single column name."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'pregordr'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.columns[1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select a column and check what type it is."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.series.Series"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pregordr = preg['pregordr']\n",
    "type(pregordr)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Print a column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        1\n",
       "1        2\n",
       "2        1\n",
       "3        2\n",
       "4        3\n",
       "5        1\n",
       "6        2\n",
       "7        3\n",
       "8        1\n",
       "9        2\n",
       "10       1\n",
       "11       1\n",
       "12       2\n",
       "13       3\n",
       "14       1\n",
       "15       2\n",
       "16       3\n",
       "17       1\n",
       "18       2\n",
       "19       1\n",
       "20       2\n",
       "21       1\n",
       "22       2\n",
       "23       1\n",
       "24       2\n",
       "25       3\n",
       "26       1\n",
       "27       1\n",
       "28       2\n",
       "29       3\n",
       "        ..\n",
       "13563    2\n",
       "13564    3\n",
       "13565    1\n",
       "13566    1\n",
       "13567    1\n",
       "13568    2\n",
       "13569    1\n",
       "13570    2\n",
       "13571    3\n",
       "13572    4\n",
       "13573    1\n",
       "13574    2\n",
       "13575    1\n",
       "13576    1\n",
       "13577    2\n",
       "13578    1\n",
       "13579    2\n",
       "13580    1\n",
       "13581    2\n",
       "13582    3\n",
       "13583    1\n",
       "13584    2\n",
       "13585    1\n",
       "13586    2\n",
       "13587    3\n",
       "13588    1\n",
       "13589    2\n",
       "13590    3\n",
       "13591    4\n",
       "13592    5\n",
       "Name: pregordr, Length: 13593, dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pregordr"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select a single element from a column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pregordr[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select a slice from a column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2    1\n",
       "3    2\n",
       "4    3\n",
       "Name: pregordr, dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pregordr[2:5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select a column using dot notation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "pregordr = preg.pregordr"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Count the number of times each value occurs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    9148\n",
       "2    1862\n",
       "3     120\n",
       "4    1921\n",
       "5     190\n",
       "6     352\n",
       "Name: outcome, dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.outcome.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Check the values of another variable."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0        8\n",
       "1.0       40\n",
       "2.0       53\n",
       "3.0       98\n",
       "4.0      229\n",
       "5.0      697\n",
       "6.0     2223\n",
       "7.0     3049\n",
       "8.0     1889\n",
       "9.0      623\n",
       "10.0     132\n",
       "11.0      26\n",
       "12.0      10\n",
       "13.0       3\n",
       "14.0       3\n",
       "15.0       1\n",
       "Name: birthwgt_lb, dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.birthwgt_lb.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Make a dictionary that maps from each respondent's `caseid` to a list of indices into the pregnancy `DataFrame`.  Use it to select the pregnancy outcomes for a single respondent."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([4, 4, 4, 4, 4, 4, 1])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "caseid = 10229\n",
    "preg_map = nsfg.MakePregMap(preg)\n",
    "indices = preg_map[caseid]\n",
    "preg.outcome[indices].values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## Exercises"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select the `birthord` column, print the value counts, and compare to results published in the [codebook](http://www.icpsr.umich.edu/nsfg6/Controller?displayPage=labelDetails&fileCode=PREG&section=A&subSec=8016&srtLabel=611933)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Solution goes here"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can also use `isnull` to count the number of nans."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4445"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.birthord.isnull().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select the `prglngth` column, print the value counts, and compare to results published in the [codebook](http://www.icpsr.umich.edu/nsfg6/Controller?displayPage=labelDetails&fileCode=PREG&section=A&subSec=8016&srtLabel=611931)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Solution goes here"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To compute the mean of a column, you can invoke the `mean` method on a Series.  For example, here is the mean birthweight in pounds:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7.265628457623368"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg.totalwgt_lb.mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a new column named <tt>totalwgt_kg</tt> that contains birth weight in kilograms.  Compute its mean.  Remember that when you create a new column, you have to use dictionary syntax, not dot notation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Solution goes here"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`nsfg.py` also provides `ReadFemResp`, which reads the female respondents file and returns a `DataFrame`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "resp = nsfg.ReadFemResp()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`DataFrame` provides a method `head` that displays the first five rows:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>caseid</th>\n",
       "      <th>rscrinf</th>\n",
       "      <th>rdormres</th>\n",
       "      <th>rostscrn</th>\n",
       "      <th>rscreenhisp</th>\n",
       "      <th>rscreenrace</th>\n",
       "      <th>age_a</th>\n",
       "      <th>age_r</th>\n",
       "      <th>cmbirth</th>\n",
       "      <th>agescrn</th>\n",
       "      <th>...</th>\n",
       "      <th>pubassis_i</th>\n",
       "      <th>basewgt</th>\n",
       "      <th>adj_mod_basewgt</th>\n",
       "      <th>finalwgt</th>\n",
       "      <th>secu_r</th>\n",
       "      <th>sest</th>\n",
       "      <th>cmintvw</th>\n",
       "      <th>cmlstyr</th>\n",
       "      <th>screentime</th>\n",
       "      <th>intvlngth</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2298</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>27</td>\n",
       "      <td>27</td>\n",
       "      <td>902</td>\n",
       "      <td>27</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1234</td>\n",
       "      <td>1222</td>\n",
       "      <td>18:26:36</td>\n",
       "      <td>110.492667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5012</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>42</td>\n",
       "      <td>42</td>\n",
       "      <td>718</td>\n",
       "      <td>42</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2335.279149</td>\n",
       "      <td>2846.799490</td>\n",
       "      <td>4744.191350</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1233</td>\n",
       "      <td>1221</td>\n",
       "      <td>16:30:59</td>\n",
       "      <td>64.294000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>11586</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "      <td>43</td>\n",
       "      <td>43</td>\n",
       "      <td>708</td>\n",
       "      <td>43</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2335.279149</td>\n",
       "      <td>2846.799490</td>\n",
       "      <td>4744.191350</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1234</td>\n",
       "      <td>1222</td>\n",
       "      <td>18:19:09</td>\n",
       "      <td>75.149167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6794</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>1042</td>\n",
       "      <td>15</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>3783.152221</td>\n",
       "      <td>5071.464231</td>\n",
       "      <td>5923.977368</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1234</td>\n",
       "      <td>1222</td>\n",
       "      <td>15:54:43</td>\n",
       "      <td>28.642833</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>616</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>20</td>\n",
       "      <td>20</td>\n",
       "      <td>991</td>\n",
       "      <td>20</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>5341.329968</td>\n",
       "      <td>6437.335772</td>\n",
       "      <td>7229.128072</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1233</td>\n",
       "      <td>1221</td>\n",
       "      <td>14:19:44</td>\n",
       "      <td>69.502667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 3087 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   caseid  rscrinf  rdormres  rostscrn  rscreenhisp  rscreenrace  age_a  \\\n",
       "0    2298        1         5         5            1          5.0     27   \n",
       "1    5012        1         5         1            5          5.0     42   \n",
       "2   11586        1         5         1            5          5.0     43   \n",
       "3    6794        5         5         4            1          5.0     15   \n",
       "4     616        1         5         4            1          5.0     20   \n",
       "\n",
       "   age_r  cmbirth  agescrn     ...      pubassis_i      basewgt  \\\n",
       "0     27      902       27     ...               0  3247.916977   \n",
       "1     42      718       42     ...               0  2335.279149   \n",
       "2     43      708       43     ...               0  2335.279149   \n",
       "3     15     1042       15     ...               0  3783.152221   \n",
       "4     20      991       20     ...               0  5341.329968   \n",
       "\n",
       "   adj_mod_basewgt     finalwgt  secu_r  sest  cmintvw  cmlstyr  screentime  \\\n",
       "0      5123.759559  5556.717241       2    18     1234     1222    18:26:36   \n",
       "1      2846.799490  4744.191350       2    18     1233     1221    16:30:59   \n",
       "2      2846.799490  4744.191350       2    18     1234     1222    18:19:09   \n",
       "3      5071.464231  5923.977368       2    18     1234     1222    15:54:43   \n",
       "4      6437.335772  7229.128072       2    18     1233     1221    14:19:44   \n",
       "\n",
       "    intvlngth  \n",
       "0  110.492667  \n",
       "1   64.294000  \n",
       "2   75.149167  \n",
       "3   28.642833  \n",
       "4   69.502667  \n",
       "\n",
       "[5 rows x 3087 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resp.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Select the `age_r` column from `resp` and print the value counts.  How old are the youngest and oldest respondents?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Solution goes here"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can use the `caseid` to match up rows from `resp` and `preg`.  For example, we can select the row from `resp` for `caseid` 2298 like this:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>caseid</th>\n",
       "      <th>rscrinf</th>\n",
       "      <th>rdormres</th>\n",
       "      <th>rostscrn</th>\n",
       "      <th>rscreenhisp</th>\n",
       "      <th>rscreenrace</th>\n",
       "      <th>age_a</th>\n",
       "      <th>age_r</th>\n",
       "      <th>cmbirth</th>\n",
       "      <th>agescrn</th>\n",
       "      <th>...</th>\n",
       "      <th>pubassis_i</th>\n",
       "      <th>basewgt</th>\n",
       "      <th>adj_mod_basewgt</th>\n",
       "      <th>finalwgt</th>\n",
       "      <th>secu_r</th>\n",
       "      <th>sest</th>\n",
       "      <th>cmintvw</th>\n",
       "      <th>cmlstyr</th>\n",
       "      <th>screentime</th>\n",
       "      <th>intvlngth</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2298</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>27</td>\n",
       "      <td>27</td>\n",
       "      <td>902</td>\n",
       "      <td>27</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>1234</td>\n",
       "      <td>1222</td>\n",
       "      <td>18:26:36</td>\n",
       "      <td>110.492667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1 rows × 3087 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   caseid  rscrinf  rdormres  rostscrn  rscreenhisp  rscreenrace  age_a  \\\n",
       "0    2298        1         5         5            1          5.0     27   \n",
       "\n",
       "   age_r  cmbirth  agescrn     ...      pubassis_i      basewgt  \\\n",
       "0     27      902       27     ...               0  3247.916977   \n",
       "\n",
       "   adj_mod_basewgt     finalwgt  secu_r  sest  cmintvw  cmlstyr  screentime  \\\n",
       "0      5123.759559  5556.717241       2    18     1234     1222    18:26:36   \n",
       "\n",
       "    intvlngth  \n",
       "0  110.492667  \n",
       "\n",
       "[1 rows x 3087 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "resp[resp.caseid==2298]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And we can get the corresponding rows from `preg` like this:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>caseid</th>\n",
       "      <th>pregordr</th>\n",
       "      <th>howpreg_n</th>\n",
       "      <th>howpreg_p</th>\n",
       "      <th>moscurrp</th>\n",
       "      <th>nowprgdk</th>\n",
       "      <th>pregend1</th>\n",
       "      <th>pregend2</th>\n",
       "      <th>nbrnaliv</th>\n",
       "      <th>multbrth</th>\n",
       "      <th>...</th>\n",
       "      <th>laborfor_i</th>\n",
       "      <th>religion_i</th>\n",
       "      <th>metro_i</th>\n",
       "      <th>basewgt</th>\n",
       "      <th>adj_mod_basewgt</th>\n",
       "      <th>finalwgt</th>\n",
       "      <th>secu_p</th>\n",
       "      <th>sest</th>\n",
       "      <th>cmintvw</th>\n",
       "      <th>totalwgt_lb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2610</th>\n",
       "      <td>2298</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.8750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2611</th>\n",
       "      <td>2298</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.5000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2612</th>\n",
       "      <td>2298</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.1875</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2613</th>\n",
       "      <td>2298</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3247.916977</td>\n",
       "      <td>5123.759559</td>\n",
       "      <td>5556.717241</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.8750</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4 rows × 244 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      caseid  pregordr  howpreg_n  howpreg_p  moscurrp  nowprgdk  pregend1  \\\n",
       "2610    2298         1        NaN        NaN       NaN       NaN       6.0   \n",
       "2611    2298         2        NaN        NaN       NaN       NaN       6.0   \n",
       "2612    2298         3        NaN        NaN       NaN       NaN       6.0   \n",
       "2613    2298         4        NaN        NaN       NaN       NaN       6.0   \n",
       "\n",
       "      pregend2  nbrnaliv  multbrth     ...       laborfor_i  religion_i  \\\n",
       "2610       NaN       1.0       NaN     ...                0           0   \n",
       "2611       NaN       1.0       NaN     ...                0           0   \n",
       "2612       NaN       1.0       NaN     ...                0           0   \n",
       "2613       NaN       1.0       NaN     ...                0           0   \n",
       "\n",
       "      metro_i      basewgt  adj_mod_basewgt     finalwgt  secu_p  sest  \\\n",
       "2610        0  3247.916977      5123.759559  5556.717241       2    18   \n",
       "2611        0  3247.916977      5123.759559  5556.717241       2    18   \n",
       "2612        0  3247.916977      5123.759559  5556.717241       2    18   \n",
       "2613        0  3247.916977      5123.759559  5556.717241       2    18   \n",
       "\n",
       "      cmintvw  totalwgt_lb  \n",
       "2610      NaN       6.8750  \n",
       "2611      NaN       5.5000  \n",
       "2612      NaN       4.1875  \n",
       "2613      NaN       6.8750  \n",
       "\n",
       "[4 rows x 244 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preg[preg.caseid==2298]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "How old is the respondent with `caseid` 1?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Solution goes here"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "What are the pregnancy lengths for the respondent with `caseid` 2298?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Solution goes here"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "What was the birthweight of the first baby born to the respondent with `caseid` 5012?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Solution goes here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
